From 0e2714b61dec3bd0130a6a6d5b1308b85f2c658b Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sun, 18 Jun 2006 12:42:16 +0000 Subject: [PATCH] Added a new method to update the cache of all pages linking to a given page without using excessive time or memory. This turns out to be a common operation in MediaWiki, I needed to fix callers in a lot of files. Backwards compatibility is maintained (of course). Also did some cleanup in Article.php, another 3 years of that and it'll be looking pretty. New job type added to the job queue, some refactoring done in JobQueue.php to support it. --- includes/Article.php | 174 +++++++++----------------- includes/AutoLoader.php | 4 +- includes/DefaultSettings.php | 11 ++ includes/HTMLCacheUpdate.php | 230 +++++++++++++++++++++++++++++++++++ includes/Image.php | 30 ++--- includes/ImagePage.php | 4 +- includes/JobQueue.php | 182 ++++++++++++++------------- includes/LinksUpdate.php | 62 +++++++--- includes/SquidUpdate.php | 8 +- includes/Title.php | 86 ++++++------- 10 files changed, 498 insertions(+), 293 deletions(-) create mode 100644 includes/HTMLCacheUpdate.php diff --git a/includes/Article.php b/includes/Article.php index d3c9ca294f..13dc5a0b1d 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -1181,7 +1181,6 @@ class Article { # Update the page record with revision data $this->updateRevisionOn( $dbw, $revision, 0 ); - Article::onArticleCreate( $this->mTitle ); if(!$suppressRC) { $rcid = RecentChange::notifyNew( $now, $this->mTitle, $isminor, $wgUser, $summary, 'default', '', strlen( $text ), $revisionId ); @@ -1207,11 +1206,14 @@ class Article { 'page_title' => $ttl ), $fname ); - # standard deferred updates + # Update links, etc. $this->editUpdates( $text, $summary, $isminor, $now, $revisionId ); - $oldid = 0; # new article - $this->showArticle( $text, wfMsg( 'newarticle' ), false, $isminor, $now, $summary, $oldid ); + # Clear caches + Article::onArticleCreate( $this->mTitle ); + + # Output a redirect back to the article + $this->doRedirect( $this->isRedirect( $text ) ); wfRunHooks( 'ArticleInsertComplete', array( &$this, &$wgUser, $text, $summary, $isminor, @@ -1318,7 +1320,6 @@ class Article { 'text' => $text ) ); - $dbw->immediateCommit(); $dbw->begin(); $revisionId = $revision->insertOn( $dbw ); @@ -1330,7 +1331,7 @@ class Article { $good = false; $dbw->rollback(); } else { - # Update recentchanges and purge cache and whatnot + # Update recentchanges $bot = (int)($wgUser->isBot() || $forceBot); $rcid = RecentChange::notifyEdit( $now, $this->mTitle, $isminor, $wgUser, $summary, $lastRevision, $this->getTimestamp(), $bot, '', $oldsize, $newsize, @@ -1342,9 +1343,6 @@ class Article { } $dbw->commit(); - - // Update caches outside the main transaction - Article::onArticleEdit( $this->mTitle ); } } else { // Keep the same revision ID, but do some updates on it @@ -1356,52 +1354,28 @@ class Article { } if ( $good ) { + # Invalidate cache of this article and all pages using this article + # as a template. Partly deferred. + Article::onArticleEdit( $this->mTitle ); + if ($watchthis) { if (!$this->mTitle->userIsWatching()) { - $dbw->immediateCommit(); $dbw->begin(); $this->doWatch(); $dbw->commit(); } } else { if ( $this->mTitle->userIsWatching() ) { - $dbw->immediateCommit(); $dbw->begin(); $this->doUnwatch(); $dbw->commit(); } } - # standard deferred updates + # Update links tables, site stats, etc. $this->editUpdates( $text, $summary, $minor, $now, $revisionId ); - - $urls = array(); - # Invalidate caches of all articles using this article as a template - - # Template namespace - # Purge all articles linking here - $titles = $this->mTitle->getTemplateLinksTo(); - Title::touchArray( $titles ); - if ( $wgUseSquid ) { - foreach ( $titles as $title ) { - $urls[] = $title->getInternalURL(); - } - } - - # Squid updates - if ( $wgUseSquid ) { - $urls = array_merge( $urls, $this->mTitle->getSquidURLs() ); - $u = new SquidUpdate( $urls ); - array_push( $wgPostCommitUpdateList, $u ); - } - - # File cache - if ( $wgUseFileCache ) { - $cm = new CacheManager($this->mTitle); - @unlink($cm->fileCacheName()); - } - - $this->showArticle( $text, wfMsg( 'updated' ), $sectionanchor, $isminor, $now, $summary, $lastRevision ); + # Output a redirect back to the article + $this->doRedirect( $this->isRedirect( $text ), $sectionanchor ); } wfRunHooks( 'ArticleSaveComplete', array( &$this, &$wgUser, $text, @@ -1412,26 +1386,29 @@ class Article { } /** - * After we've either updated or inserted the article, update - * the link tables and redirect to the new page. - * @todo FIXME some function arguments never used + * @deprecated wrapper for doRedirect */ function showArticle( $text, $subtitle , $sectionanchor = '', $me2, $now, $summary, $oldid ) { - global $wgOut; - - $fname = 'Article::showArticle'; - wfProfileIn( $fname ); - - # Output the redirect - if( $this->isRedirect( $text ) ) - $r = 'redirect=no'; - else - $r = ''; - $wgOut->redirect( $this->mTitle->getFullURL( $r ).$sectionanchor ); - - wfProfileOut( $fname ); + $this->doRedirect( $this->isRedirect( $text ), $sectionanchor ); } + /** + * Output a redirect back to the article. + * This is typically used after an edit. + * + * @param boolean $noRedir Add redirect=no + * @param string $sectionAnchor section to redirect to, including "#" + */ + function doRedirect( $noRedir = false, $sectionAnchor = '' ) { + global $wgOut; + if ( $noRedir ) { + $query = 'redirect=no'; + } else { + $query = ''; + } + $wgOut->redirect( $this->mTitle->getFullURL( $query ) . $sectionAnchor ); + } + /** * Mark this particular edit as patrolled */ @@ -1927,24 +1904,6 @@ class Article { $u = new SiteStatsUpdate( 0, 1, -(int)$this->isCountable( $this->getContent() ), -1 ); array_push( $wgDeferredUpdateList, $u ); - $linksTo = $this->mTitle->getLinksTo(); - - # Squid purging - if ( $wgUseSquid ) { - $urls = array( - $this->mTitle->getInternalURL(), - $this->mTitle->getInternalURL( 'history' ) - ); - - $u = SquidUpdate::newFromTitles( $linksTo, $urls ); - array_push( $wgPostCommitUpdateList, $u ); - - } - - # Client and file cache invalidation - Title::touchArray( $linksTo ); - - // For now, shunt the revision data into the archive table. // Text is *not* removed from the text table; bulk storage // is left intact to avoid breaking block-compression or @@ -1985,6 +1944,7 @@ class Article { # Finally, clean up the link tables $t = $this->mTitle->getPrefixedDBkey(); + # Clear caches Article::onArticleDelete( $this->mTitle ); # Delete outgoing links @@ -2042,12 +2002,10 @@ class Article { $tt = $this->mTitle->getDBKey(); $n = $this->mTitle->getNamespace(); - # Get the last editor, lock table exclusively - $dbw->begin(); + # Get the last editor $current = Revision::newFromTitle( $this->mTitle ); if( is_null( $current ) ) { # Something wrong... no page? - $dbw->rollback(); $wgOut->addHTML( wfMsg( 'notanarticle' ) ); return; } @@ -2082,7 +2040,6 @@ class Article { ); if( $s === false ) { # Something wrong - $dbw->rollback(); $wgOut->setPageTitle(wfMsg('rollbackfailed')); $wgOut->addHTML( wfMsg( 'cantrollback' ) ); return; @@ -2119,9 +2076,7 @@ class Article { $wgOut->addHTML( '

' . htmlspecialchars( $newComment ) . "

\n
\n" ); $this->updateArticle( $target->getText(), $newComment, 1, $this->mTitle->userIsWatching(), $bot ); - Article::onArticleEdit( $this->mTitle ); - $dbw->commit(); $wgOut->returnToMain( false ); } @@ -2149,7 +2104,9 @@ class Article { /** * Do standard deferred updates after page edit. + * Update links tables, site stats, search index and message cache. * Every 1000th edit, prune the recent changes table. + * * @private * @param string $text */ @@ -2447,27 +2404,22 @@ class Article { * @param $title_obj a title object */ - function onArticleCreate($title_obj) { - global $wgUseSquid, $wgPostCommitUpdateList; - - $title_obj->touchLinks(); - $titles = $title_obj->getLinksTo(); - - # Purge squid - if ( $wgUseSquid ) { - $urls = $title_obj->getSquidURLs(); - foreach ( $titles as $linkTitle ) { - $urls[] = $linkTitle->getInternalURL(); - } - $u = new SquidUpdate( $urls ); - array_push( $wgPostCommitUpdateList, $u ); - } + static function onArticleCreate($title) { + $title->touchLinks(); + $title->purgeSquid(); } - function onArticleDelete( $title ) { - global $wgMessageCache; + static function onArticleDelete( $title ) { + global $wgUseFileCache, $wgMessageCache; $title->touchLinks(); + $title->purgeSquid(); + + # File cache + if ( $wgUseFileCache ) { + $cm = new CacheManager( $title ); + @unlink( $cm->fileCacheName() ); + } if( $title->getNamespace() == NS_MEDIAWIKI) { $wgMessageCache->replace( $title->getDBkey(), false ); @@ -2477,31 +2429,19 @@ class Article { /** * Purge caches on page update etc */ - function onArticleEdit( $title ) { - global $wgUseSquid, $wgPostCommitUpdateList, $wgUseFileCache; + static function onArticleEdit( $title ) { + global $wgDeferredUpdateList, $wgUseFileCache; $urls = array(); - // Template namespace? Purge all articles linking here. - // FIXME: When a templatelinks table arrives, use it for all includes. - if ( $title->getNamespace() == NS_TEMPLATE) { - $titles = $title->getLinksTo(); - Title::touchArray( $titles ); - if ( $wgUseSquid ) { - foreach ( $titles as $link ) { - $urls[] = $link->getInternalURL(); - } - } - } + // Invalidate caches of articles which include this page + $update = new HTMLCacheUpdate( $title, 'templatelinks' ); + $wgDeferredUpdateList[] = $update; - # Squid updates - if ( $wgUseSquid ) { - $urls = array_merge( $urls, $title->getSquidURLs() ); - $u = new SquidUpdate( $urls ); - array_push( $wgPostCommitUpdateList, $u ); - } + # Purge squid for this page only + $title->purgeSquid(); - # File cache + # Clear file cache if ( $wgUseFileCache ) { $cm = new CacheManager( $title ); @unlink( $cm->fileCacheName() ); diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index d60339a4d2..5c0122a2dd 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -80,6 +80,8 @@ function __autoload($class_name) { 'ConcatenatedGzipHistoryBlob' => 'HistoryBlob.php', 'HistoryBlobStub' => 'HistoryBlob.php', 'HistoryBlobCurStub' => 'HistoryBlob.php', + 'HTMLCacheUpdate' => 'HTMLCacheUpdate.php', + 'HTMLCacheUpdateJob' => 'HTMLCacheUpdate.php', 'Image' => 'Image.php', 'ThumbnailImage' => 'Image.php', 'ImageGallery' => 'ImageGallery.php', @@ -224,4 +226,4 @@ function __autoload($class_name) { } } -?> \ No newline at end of file +?> diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index d1169a80a2..321a767506 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -62,6 +62,7 @@ $wgProto = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') ? 'https' : ' $wgServer = $wgProto.'://' . $wgServerName; # If the port is a non-standard one, add it to the URL if( isset( $_SERVER['SERVER_PORT'] ) + && !strpos( $wgServerName, ':' ) && ( ( $wgProto == 'http' && $_SERVER['SERVER_PORT'] != 80 ) || ( $wgProto == 'https' && $_SERVER['SERVER_PORT'] != 443 ) ) ) { @@ -2060,6 +2061,16 @@ $wgJobRunRate = 1; */ $wgJobLogFile = false; +/** + * Number of rows to update per job + */ +$wgUpdateRowsPerJob = 500; + +/** + * Number of rows to update per query + */ +$wgUpdateRowsPerQuery = 10; + /** * Enable use of AJAX features, currently auto suggestion for the search bar */ diff --git a/includes/HTMLCacheUpdate.php b/includes/HTMLCacheUpdate.php new file mode 100644 index 0000000000..dd7c53bc2e --- /dev/null +++ b/includes/HTMLCacheUpdate.php @@ -0,0 +1,230 @@ +mTitle = $titleTo; + $this->mTable = $table; + $this->mRowsPerJob = $wgUpdateRowsPerJob; + $this->mRowsPerQuery = $wgUpdateRowsPerQuery; + } + + function doUpdate() { + # Fetch the IDs + $cond = $this->getToCondition(); + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( $this->mTable, $this->getFromField(), $cond, __METHOD__ ); + $resWrap = new ResultWrapper( $dbr, $res ); + if ( $dbr->numRows( $res ) != 0 ) { + if ( $dbr->numRows( $res ) > $this->mRowsPerJob ) { + $this->insertJobs( $resWrap ); + } else { + $this->invalidateIDs( $resWrap ); + } + } + $dbr->freeResult( $res ); + } + + function insertJobs( ResultWrapper $res ) { + $numRows = $res->numRows(); + $numBatches = ceil( $numRows / $this->mRowsPerJob ); + $realBatchSize = $numRows / $numBatches; + $boundaries = array(); + $start = false; + $jobs = array(); + do { + for ( $i = 0; $i < $realBatchSize - 1; $i++ ) { + $row = $res->fetchRow(); + if ( $row ) { + $id = $row[0]; + } else { + $id = false; + break; + } + } + if ( $id !== false ) { + // One less on the end to avoid duplicating the boundary + $job = new HTMLCacheUpdateJob( $this->mTitle, $this->mTable, $start, $id - 1 ); + } else { + $job = new HTMLCacheUpdateJob( $this->mTitle, $this->mTable, $start, false ); + } + $jobs[] = $job; + + $start = $id; + } while ( $start ); + + Job::batchInsert( $jobs ); + } + + function getPrefix() { + static $prefixes = array( + 'pagelinks' => 'pl', + 'imagelinks' => 'il', + 'categorylinks' => 'cl', + 'templatelinks' => 'tl', + + # Not needed + # 'externallinks' => 'el', + # 'langlinks' => 'll' + ); + + if ( is_null( $this->mPrefix ) ) { + $this->mPrefix = $prefixes[$this->mTable]; + if ( is_null( $this->mPrefix ) ) { + throw new MWException( "Invalid table type \"{$this->mTable}\" in " . __CLASS__ ); + } + } + return $this->mPrefix; + } + + function getFromField() { + return $this->getPrefix() . '_from'; + } + + function getToCondition() { + switch ( $this->mTable ) { + case 'pagelinks': + return array( + 'pl_namespace' => $this->mTitle->getNamespace(), + 'pl_title' => $this->mTitle->getDBkey() + ); + case 'templatelinks': + return array( + 'tl_namespace' => $this->mTitle->getNamespace(), + 'tl_title' => $this->mTitle->getDBkey() + ); + case 'imagelinks': + return array( 'il_to' => $this->mTitle->getDBkey() ); + case 'categorylinks': + return array( 'cl_to' => $this->mTitle->getDBkey() ); + } + throw new MWException( 'Invalid table type in ' . __CLASS__ ); + } + + /** + * Invalidate a set of IDs, right now + */ + function invalidateIDs( ResultWrapper $res ) { + global $wgUseFileCache, $wgUseSquid; + + if ( $res->numRows() == 0 ) { + return; + } + + $dbw =& wfGetDB( DB_MASTER ); + $timestamp = $dbw->timestamp(); + $done = false; + + while ( !$done ) { + # Get all IDs in this query into an array + $ids = array(); + for ( $i = 0; $i < $this->mRowsPerQuery; $i++ ) { + $row = $res->fetchRow(); + if ( $row ) { + $ids[] = $row[0]; + } else { + $done = true; + break; + } + } + + if ( !count( $ids ) ) { + break; + } + + # Update page_touched + $dbw->update( 'page', + array( 'page_touched' => $timestamp ), + array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ), + __METHOD__ + ); + + # Update squid + if ( $wgUseSquid || $wgUseFileCache ) { + $titles = Title::newFromIDs( $ids ); + if ( $wgUseSquid ) { + $u = SquidUpdate::newFromTitles( $titles ); + $u->doUpdate(); + } + + # Update file cache + if ( $wgUseFileCache ) { + foreach ( $titles as $title ) { + $cm = new CacheManager($title); + @unlink($cm->fileCacheName()); + } + } + } + } + } +} + +class HTMLCacheUpdateJob extends Job { + var $table, $start, $end; + + /** + * Construct a job + * @param Title $title The title linked to + * @param string $table The name of the link table. + * @param integer $start Beginning page_id or false for open interval + * @param integer $end End page_id or false for open interval + * @param integer $id job_id + */ + function __construct( $title, $table, $start, $end, $id = 0 ) { + $params = array( + 'table' => $table, + 'start' => $start, + 'end' => $end ); + parent::__construct( 'html_cache_update', $title, $params, $id ); + $this->table = $table; + $this->start = intval( $start ); + $this->end = intval( $end ); + } + + function run() { + $update = new HTMLCacheUpdate( $this->title, $this->table ); + + $fromField = $update->getFromField(); + $conds = $update->getToCondition(); + if ( $this->start ) { + $conds[] = "$fromField >= {$this->start}"; + } + if ( $this->end ) { + $conds[] = "$fromField <= {$this->end}"; + } + + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( $this->table, $fromField, $conds, __METHOD__ ); + $update->invalidateIDs( new ResultWrapper( $dbr, $res ) ); + $dbr->freeResult( $res ); + + return true; + } +} +?> diff --git a/includes/Image.php b/includes/Image.php index dd40c0f2d2..3573d3653f 100644 --- a/includes/Image.php +++ b/includes/Image.php @@ -1316,16 +1316,8 @@ class Image $this->purgeDescription(); // Purge cache of all pages using this image - $linksTo = $this->getLinksTo(); - global $wgUseSquid, $wgPostCommitUpdateList; - if ( $wgUseSquid ) { - $u = SquidUpdate::newFromTitles( $linksTo, $urlArr ); - array_push( $wgPostCommitUpdateList, $u ); - } - - // Invalidate parser cache and client cache for pages using this image - // This is left until relatively late to reduce lock time - Title::touchArray( $linksTo ); + $update = new HTMLCacheUpdate( $this->getTitle(), 'imagelinks' ); + $update->doUpdate(); } function checkDBSchema(&$db) { @@ -1461,7 +1453,7 @@ class Image * Record an image upload in the upload log and the image table */ function recordUpload( $oldver, $desc, $license = '', $copyStatus = '', $source = '', $watch = false ) { - global $wgUser, $wgUseCopyrightUpload, $wgUseSquid, $wgPostCommitUpdateList; + global $wgUser, $wgUseCopyrightUpload; $fname = 'Image::recordUpload'; $dbw =& wfGetDB( DB_MASTER ); @@ -1528,8 +1520,6 @@ class Image $fname, 'IGNORE' ); - $descTitle = $this->getTitle(); - $purgeURLs = array(); if( $dbw->affectedRows() == 0 ) { # Collision, this is an update of an image @@ -1575,6 +1565,7 @@ class Image $dbw->query( "UPDATE $site_stats SET ss_images=ss_images+1", $fname ); } + $descTitle = $this->getTitle(); $article = new Article( $descTitle ); $minor = false; $watch = $watch || $wgUser->isWatched( $descTitle ); @@ -1588,7 +1579,7 @@ class Image # Invalidate the cache for the description page $descTitle->invalidateCache(); - $purgeURLs[] = $descTitle->getInternalURL(); + $descTitle->purgeSquid(); } else { // New image; create the description page. $article->insertNewArticle( $textdesc, $desc, $minor, $watch, $suppressRC ); @@ -1603,13 +1594,8 @@ class Image $dbw->immediateCommit(); # Invalidate cache for all pages using this image - $linksTo = $this->getLinksTo(); - - if ( $wgUseSquid ) { - $u = SquidUpdate::newFromTitles( $linksTo, $purgeURLs ); - array_push( $wgPostCommitUpdateList, $u ); - } - Title::touchArray( $linksTo ); + $update = new HTMLCacheUpdate( $this->getTitle(), 'imagelinks' ); + $update->doUpdate(); return true; } @@ -1619,6 +1605,8 @@ class Image * Also adds their IDs to the link cache * * This is mostly copied from Title::getLinksTo() + * + * @deprecated Use HTMLCacheUpdate, this function uses too much memory */ function getLinksTo( $options = '' ) { $fname = 'Image::getLinksTo'; diff --git a/includes/ImagePage.php b/includes/ImagePage.php index 8a5e166ba3..fbd8297814 100644 --- a/includes/ImagePage.php +++ b/includes/ImagePage.php @@ -631,8 +631,8 @@ END $this->img = new Image( $this->mTitle ); if( $this->img->exists() ) { wfDebug( "ImagePage::doPurge purging " . $this->img->getName() . "\n" ); - $linksTo = $this->img->getLinksTo(); - Title::touchArray( $linksTo ); + $update = new HTMLCacheUpdate( $this->mTitle, 'imagelinks' ); + $update->doUpdate(); $this->img->purgeCache(); } else { wfDebug( "ImagePage::doPurge no image\n" ); diff --git a/includes/JobQueue.php b/includes/JobQueue.php index 831d37e8d4..f82b16f899 100644 --- a/includes/JobQueue.php +++ b/includes/JobQueue.php @@ -4,7 +4,7 @@ if ( !defined( 'MEDIAWIKI' ) ) { die( "This file is part of MediaWiki, it is not a valid entry point\n" ); } -class Job { +abstract class Job { var $command, $title, $params, @@ -15,50 +15,37 @@ class Job { /*------------------------------------------------------------------------- * Static functions *------------------------------------------------------------------------*/ + + /** + * @deprecated use LinksUpdate::queueRecursiveJobs() + */ /** - * Add an array of refreshLinks jobs to the queue - * @param array $titles Array of title objects. - * @static + * static function queueLinksJobs( $titles ) {} */ - function queueLinksJobs( $titles ) { - $fname = 'Job::queueLinksJobs'; - wfProfileIn( $fname ); - $batchSize = 100; - for( $i = 0; $i < count( $titles ); $i += $batchSize ) { - $batch = array_slice( $titles, $i, $batchSize, true ); - $jobs = array(); - foreach( $batch as $title ) { - $jobs[] = new Job( 'refreshLinks', $title ); - } - Job::batchInsert( $jobs ); - } - wfProfileOut( $fname ); - } /** * Pop a job off the front of the queue * @static * @return Job or false if there's no jobs */ - function pop() { - $fname = 'Job::pop'; - wfProfileIn( $fname ); + static function pop() { + wfProfileIn( __METHOD__ ); $dbr =& wfGetDB( DB_SLAVE ); // Get a job from the slave - $row = $dbr->selectRow( 'job', '*', '', $fname, + $row = $dbr->selectRow( 'job', '*', '', __METHOD__, array( 'ORDER BY' => 'job_id', 'LIMIT' => 1 ) ); if ( $row === false ) { - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } // Try to delete it from the master $dbw =& wfGetDB( DB_MASTER ); - $dbw->delete( 'job', array( 'job_id' => $row->job_id ), $fname ); + $dbw->delete( 'job', array( 'job_id' => $row->job_id ), __METHOD__ ); $affected = $dbw->affectedRows(); $dbw->immediateCommit(); @@ -66,30 +53,30 @@ class Job { // Failed, someone else beat us to it // Try getting a random row $row = $dbw->selectRow( 'job', array( 'MIN(job_id) as minjob', - 'MAX(job_id) as maxjob' ), '', $fname ); + 'MAX(job_id) as maxjob' ), '', __METHOD__ ); if ( $row === false || is_null( $row->minjob ) || is_null( $row->maxjob ) ) { // No jobs to get - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } // Get the random row $row = $dbw->selectRow( 'job', '*', - array( 'job_id' => mt_rand( $row->minjob, $row->maxjob ) ), $fname ); + array( 'job_id' => mt_rand( $row->minjob, $row->maxjob ) ), __METHOD__ ); if ( $row === false ) { // Random job gone before we got the chance to select it // Give up - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } // Delete the random row - $dbw->delete( 'job', array( 'job_id' => $row->job_id ), $fname ); + $dbw->delete( 'job', array( 'job_id' => $row->job_id ), __METHOD__ ); $affected = $dbw->affectedRows(); $dbw->immediateCommit(); if ( !$affected ) { // Random job gone before we exclusively deleted it // Give up - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } } @@ -99,20 +86,50 @@ class Job { $namespace = $row->job_namespace; $dbkey = $row->job_title; $title = Title::makeTitleSafe( $namespace, $dbkey ); - $job = new Job( $row->job_cmd, $title, $row->job_params, $row->job_id ); + $job = Job::factory( $row->job_cmd, $title, Job::extractBlob( $row->job_params ), $row->job_id ); // Remove any duplicates it may have later in the queue - $dbw->delete( 'job', $job->insertFields(), $fname ); + $dbw->delete( 'job', $job->insertFields(), __METHOD__ ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $job; } + /** + * Create an object of a subclass + */ + static function factory( $command, $title, $params = false, $id = 0 ) { + switch ( $command ) { + case 'refreshLinks': + return new RefreshLinksJob( $title, $params, $id ); + case 'html_cache_update': + return new HTMLCacheUpdateJob( $title, $params['table'], $params['start'], $params['end'], $id ); + default: + throw new MWException( "Invalid job command \"$command\"" ); + } + } + + static function makeBlob( $params ) { + if ( $params !== false ) { + return serialize( $params ); + } else { + return ''; + } + } + + static function extractBlob( $blob ) { + if ( (string)$blob !== '' ) { + return unserialize( $blob ); + } else { + return false; + } + } + /*------------------------------------------------------------------------- * Non-static functions *------------------------------------------------------------------------*/ - function Job( $command, $title, $params = '', $id = 0 ) { + function __construct( $command, $title, $params = false, $id = 0 ) { $this->command = $command; $this->title = $title; $this->params = $params; @@ -127,20 +144,18 @@ class Job { * Insert a single job into the queue. */ function insert() { - $fname = 'Job::insert'; - $fields = $this->insertFields(); $dbw =& wfGetDB( DB_MASTER ); if ( $this->removeDuplicates ) { - $res = $dbw->select( 'job', array( '1' ), $fields, $fname ); + $res = $dbw->select( 'job', array( '1' ), $fields, __METHOD__ ); if ( $dbw->numRows( $res ) ) { return; } } $fields['job_id'] = $dbw->nextSequenceValue( 'job_job_id_seq' ); - $dbw->insert( 'job', $fields, $fname ); + $dbw->insert( 'job', $fields, __METHOD__ ); } protected function insertFields() { @@ -148,7 +163,7 @@ class Job { 'job_cmd' => $this->command, 'job_namespace' => $this->title->getNamespace(), 'job_title' => $this->title->getDBkey(), - 'job_params' => $this->params + 'job_params' => Job::makeBlob( $this->params ) ); } @@ -162,16 +177,14 @@ class Job { * @param $jobs array of Job objects */ static function batchInsert( $jobs ) { - $fname = __CLASS__ . '::' . __FUNCTION__; - if( count( $jobs ) ) { $dbw = wfGetDB( DB_MASTER ); $dbw->begin(); foreach( $jobs as $job ) { $rows[] = $job->insertFields(); } - $dbw->insert( 'job', $rows, $fname, 'IGNORE' ); - $dbw->immediateCommit(); + $dbw->insert( 'job', $rows, __METHOD__, 'IGNORE' ); + $dbw->commit(); } } @@ -179,35 +192,47 @@ class Job { * Run the job * @return boolean success */ - function run() { - $fname = 'Job::run'; - wfProfileIn( $fname ); - switch ( $this->command ) { - case 'refreshLinks': - $retval = $this->refreshLinks(); - break; - default: - $retval = true; - if( wfRunHooks( 'RunUnknownJob', array( &$this, &$retval ) ) ) { - $this->error = "Invalid job type {$this->command}, ignoring"; - wfDebug( $this->error . "\n" ); - $retval = false; - } else { - $retval = true; + abstract function run(); + + function toString() { + $paramString = ''; + if ( $this->params ) { + foreach ( $this->params as $key => $value ) { + if ( $paramString != '' ) { + $paramString .= ' '; } + $paramString .= "$key=$value"; + } + } + + if ( is_object( $this->title ) ) { + $s = "{$this->command} " . $this->title->getPrefixedDBkey(); + if ( $paramString !== '' ) { + $s .= ' ' . $paramString; + } + return $s; + } else { + return "{$this->command} $paramString"; } - wfProfileOut( $fname ); - return $retval; + } + + function getLastError() { + return $this->error; + } +} + +class RefreshLinksJob extends Job { + function __construct( $title, $params = '', $id = 0 ) { + parent::__construct( 'refreshLinks', $title, $params, $id ); } /** * Run a refreshLinks job * @return boolean success */ - function refreshLinks() { + function run() { global $wgParser; - $fname = 'Job::refreshLinks'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); # FIXME: $dbw never used. $dbw =& wfGetDB( DB_MASTER ); @@ -217,43 +242,28 @@ class Job { if ( is_null( $this->title ) ) { $this->error = "refreshLinks: Invalid title"; - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } $revision = Revision::newFromTitle( $this->title ); if ( !$revision ) { $this->error = 'refreshLinks: Article not found "' . $this->title->getPrefixedDBkey() . '"'; - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } - wfProfileIn( "$fname-parse" ); + wfProfileIn( __METHOD__.'-parse' ); $options = new ParserOptions; $parserOutput = $wgParser->parse( $revision->getText(), $this->title, $options, true, true, $revision->getId() ); - wfProfileOut( "$fname-parse" ); - wfProfileIn( "$fname-update" ); + wfProfileOut( __METHOD__.'-parse' ); + wfProfileIn( __METHOD__.'-update' ); $update = new LinksUpdate( $this->title, $parserOutput, false ); $update->doUpdate(); - wfProfileOut( "$fname-update" ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__.'-update' ); + wfProfileOut( __METHOD__ ); return true; } - - function toString() { - if ( is_object( $this->title ) ) { - $s = "{$this->command} " . $this->title->getPrefixedDBkey(); - if ( $this->params !== '' ) { - $s .= ', ' . $this->params; - } - return $s; - } else { - return "{$this->command} {$this->params}"; - } - } - - function getLastError() { - return $this->error; - } } + ?> diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index caa1c97689..9e25bf07d0 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -85,7 +85,7 @@ class LinksUpdate { function doIncrementalUpdate() { $fname = 'LinksUpdate::doIncrementalUpdate'; wfProfileIn( $fname ); - + # Page links $existing = $this->getExistingLinks(); $this->incrTableUpdate( 'pagelinks', 'pl', $this->getLinkDeletions( $existing ), @@ -115,14 +115,6 @@ class LinksUpdate { $this->incrTableUpdate( 'templatelinks', 'tl', $this->getTemplateDeletions( $existing ), $this->getTemplateInsertions( $existing ) ); - # Refresh links of all pages including this page - if ( $this->mRecursive ) { - $tlto = $this->mTitle->getTemplateLinksTo(); - if ( count( $tlto ) ) { - Job::queueLinksJobs( $tlto ); - } - } - # Category links $existing = $this->getExistingCategories(); $this->incrTableUpdate( 'categorylinks', 'cl', $this->getCategoryDeletions( $existing ), @@ -132,6 +124,12 @@ class LinksUpdate { $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing ); $this->invalidateCategories( $categoryUpdates ); + # Refresh links of all pages including this page + # This will be in a separate transaction + if ( $this->mRecursive ) { + $this->queueRecursiveJobs(); + } + wfProfileOut( $fname ); } @@ -150,14 +148,6 @@ class LinksUpdate { $existing = $this->getExistingImages(); $imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing ); - # Refresh links of all pages including this page - if ( $this->mRecursive ) { - $tlto = $this->mTitle->getTemplateLinksTo(); - if ( count( $tlto ) ) { - Job::queueLinksJobs( $tlto ); - } - } - $this->dumbTableUpdate( 'pagelinks', $this->getLinkInsertions(), 'pl_from' ); $this->dumbTableUpdate( 'imagelinks', $this->getImageInsertions(), 'il_from' ); $this->dumbTableUpdate( 'categorylinks', $this->getCategoryInsertions(), 'cl_from' ); @@ -169,8 +159,46 @@ class LinksUpdate { $this->invalidateCategories( $categoryUpdates ); $this->invalidateImageDescriptions( $imageUpdates ); + # Refresh links of all pages including this page + # This will be in a separate transaction + if ( $this->mRecursive ) { + $this->queueRecursiveJobs(); + } + wfProfileOut( $fname ); } + + function queueRecursiveJobs() { + wfProfileIn( __METHOD__ ); + + $batchSize = 100; + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( array( 'templatelinks', 'page' ), + array( 'page_namespace', 'page_title' ), + array( + 'page_id=tl_from', + 'tl_namespace' => $this->mTitle->getNamespace(), + 'tl_title' => $this->mTitle->getDBkey() + ), __METHOD__ + ); + + $done = false; + while ( !$done ) { + $jobs = array(); + for ( $i = 0; $i < $batchSize; $i++ ) { + $row = $dbr->fetchObject( $res ); + if ( !$row ) { + $done = true; + break; + } + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $jobs[] = Job::factory( 'refreshLinks', $title ); + } + Job::batchInsert( $jobs ); + } + $dbr->freeResult( $res ); + wfProfileOut( __METHOD__ ); + } /** * Invalidate the cache of a list of pages from a single namespace diff --git a/includes/SquidUpdate.php b/includes/SquidUpdate.php index 7ccdf5bc61..bcb02ce970 100644 --- a/includes/SquidUpdate.php +++ b/includes/SquidUpdate.php @@ -54,6 +54,10 @@ class SquidUpdate { } /* static */ function newFromTitles( &$titles, $urlArr = array() ) { + global $wgMaxSquidPurgeTitles; + if ( count( $titles ) > $wgMaxSquidPurgeTitles ) { + $titles = array_slice( $titles, 0, $wgMaxSquidPurgeTitles ); + } foreach ( $titles as $title ) { $urlArr[] = $title->getInternalURL(); } @@ -77,8 +81,8 @@ class SquidUpdate { /* static */ function purge( $urlArr ) { global $wgSquidServers, $wgHTCPMulticastAddress, $wgHTCPPort; - if ( $wgSquidServers == 'echo' ) { - echo implode("
\n", $urlArr); + if ( (@$wgSquidServers[0]) == 'echo' ) { + echo implode("
\n", $urlArr) . "
\n"; return; } diff --git a/includes/Title.php b/includes/Title.php index c377798de3..6c29bc6695 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -205,6 +205,21 @@ class Title { return $title; } + /** + * Make an array of titles from an array of IDs + */ + function newFromIDs( $ids ) { + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ), + 'page_id IN (' . $dbr->makeList( $ids ) . ')', __METHOD__ ); + + $titles = array(); + while ( $row = $dbr->fetchObject( $res ) ) { + $titles[] = Title::makeTitle( $row->page_namespace, $row->page_title ); + } + return $titles; + } + /** * Create a new Title from a namespace index and a DB key. * It's assumed that $ns and $title are *valid*, for instance when @@ -1572,6 +1587,9 @@ class Title { * Get an array of Title objects linking to this Title * Also stores the IDs in the link cache. * + * WARNING: do not use this function on arbitrary user-supplied titles! + * On heavily-used templates it will max out the memory. + * * @param string $options may be FOR UPDATE * @return array the Title objects linking here * @access public @@ -1612,6 +1630,9 @@ class Title { * Get an array of Title objects using this Title as a template * Also stores the IDs in the link cache. * + * WARNING: do not use this function on arbitrary user-supplied titles! + * On heavily-used templates it will max out the memory. + * * @param string $options may be FOR UPDATE * @return array the Title objects linking here * @access public @@ -1673,6 +1694,15 @@ class Title { ); } + function purgeSquid() { + global $wgUseSquid; + if ( $wgUseSquid ) { + $urls = $this->getSquidURLs(); + $u = new SquidUpdate( $urls ); + $u->doUpdate(); + } + } + /** * Move this page without authentication * @param Title &$nt the new page Title @@ -1953,21 +1983,9 @@ class Title { 'pl_title' => $nt->getDBkey() ), $fname ); - # Non-existent target may have had broken links to it; these must - # now be touched to update link coloring. - $nt->touchLinks(); - # Purge old title from squid # The new title, and links to the new title, are purged in Article::onArticleCreate() - $titles = $nt->getLinksTo(); - if ( $wgUseSquid ) { - $urls = $this->getSquidURLs(); - foreach ( $titles as $linkTitle ) { - $urls[] = $linkTitle->getInternalURL(); - } - $u = new SquidUpdate( $urls ); - $u->doUpdate(); - } + $this->purgeSquid(); } /** @@ -2190,44 +2208,18 @@ class Title { } /** - * Update page_touched timestamps on pages linking to this title. - * In principal, this could be backgrounded and could also do squid - * purging. + * Update page_touched timestamps and send squid purge messages for + * pages linking to this title. May be sent to the job queue depending + * on the number of links. Typically called on create and delete. */ function touchLinks() { - $fname = 'Title::touchLinks'; - - $dbw =& wfGetDB( DB_MASTER ); - - $res = $dbw->select( 'pagelinks', - array( 'pl_from' ), - array( - 'pl_namespace' => $this->getNamespace(), - 'pl_title' => $this->getDbKey() ), - $fname ); - - $toucharr = array(); - while( $row = $dbw->fetchObject( $res ) ) { - $toucharr[] = $row->pl_from; - } - $dbw->freeResult( $res ); + $u = new HTMLCacheUpdate( $this, 'pagelinks' ); + $u->doUpdate(); - if( $this->getNamespace() == NS_CATEGORY ) { - // Categories show up in a separate set of links as well - $res = $dbw->select( 'categorylinks', - array( 'cl_from' ), - array( 'cl_to' => $this->getDbKey() ), - $fname ); - while( $row = $dbw->fetchObject( $res ) ) { - $toucharr[] = $row->cl_from; - } - $dbw->freeResult( $res ); + if ( $this->getNamespace() == NS_CATEGORY ) { + $u = new HTMLCacheUpdate( $this, 'categorylinks' ); + $u->doUpdate(); } - - if (!count($toucharr)) - return; - $dbw->update( 'page', /* SET */ array( 'page_touched' => $dbw->timestamp() ), - /* WHERE */ array( 'page_id' => $toucharr ),$fname); } function trackbackURL() { -- 2.20.1